#!/usr/bin/python
#
# Copyright (c) 2009 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""cleanup_dats.py - Sort and clean perf dat files of irregularities."""

import os
import sys

# Prepend the buildbot pylibs directory to our import path.
sys.path.reverse()
sys.path.append(os.path.join(os.path.dirname(__file__), '../../buildbot/pylibs'))
sys.path.reverse()

import errno
import optparse
import re
import simplejson
import subprocess


__version__ = '1.0'

USAGE = r"""%prog [dir1] ... [dirN]

Clean up perf summary.dat files in |dir1| through |dirN| and their
subdirectories.  If no directories are given, the current working
directory and its subdirectories are searched, instead.

Examples:
  Clean up all perf summary.dat files in the current directory:
    %prog

  Clean up perf summary.dat files in |dir1| and |dir2|:
    %prog |dir1| |dir2|
"""


def Backquote(cmd, cwd=None):
  """Like running `cmd` in a shell script."""
  return subprocess.Popen(cmd,
                          cwd=cwd,
                          stdout=subprocess.PIPE).communicate()[0].strip()


def ReadJson(filename):
  """Read the JSON file into memory as a Python data structure.

  Returns a list of hashes contained in the specified filename.  Corrects any
  irregularities found in the text on disk before parsing the JSON data or in
  the hash before appending the new value to the list.
  """
  if not filename.startswith('/') and not filename.startswith('./'):
    filename = "./%s" % filename

  try:
    file = open(filename, 'r')
  except IOError, e:
    print >> sys.stderr, ("I/O Error reading file %s(%s): %s" %
                         (filename, e.errno, e.strerror))
    raise e

  if not file:
    return None

  data = []
  contents = file.read()
  contentslist = contents.split("\n")
  for jsontext in contentslist:
    if jsontext is None or len(jsontext) == 0:
      continue

    # Some .dat files contain ending commas (invalid JSON).
    jsontext = re.sub(r'(.*)], }(.*)', r'\1]}\2', jsontext)
    try:
      json = simplejson.loads(jsontext, parse_float=str,
                              object_pairs_hook=simplejson.OrderedDict)
    except ValueError, e:
      print >> sys.stderr, ("Error parsing file %s: '%s'" %
                            (filename, jsontext))
      raise e

    # Some .dat files contain invalid rev values.
    if json['rev'] == "None":
      if not filename.startswith('./linux-release-webkit-latest/'):
        raise Exception("error reading %s: rev None found" % filename)
      continue
    data.append(json)
  file.close()
  return data


def WriteJson(filename, data):
  """Write a list of hashes in |data| to the file specified in |filename|."""
  try:
    file = open(filename, 'w')
  except IOError, e:
    print >> sys.stderr, ("I/O Error writing file %s(%s): %s" %
                          (filename, e.errno, e.strerror))
  if file:
    contentslist = []
    for json in data:
      contentslist.append(simplejson.dumps(json))
    contents = "\n".join(contentslist)
    file.write(contents + "\n")
  return True


def ProcessJson(filename):
  """Reads, sorts, and writes each JSON data file."""
  tempfilename = filename + ".tmp"
  if os.path.exists(tempfilename):
    raise Exception("%s already exists" % tempfilename)
  rc = os.system("cp '%s' '%s' >/dev/null 2>&1" % (filename, tempfilename))
  if rc != 0:
    raise Exception("could not copy %s to %s" % (filename, tempfilename))
  data = ReadJson(filename)
  data.sort(lambda x, y: cmp(int(y['rev']), int(x['rev'])))
  rc = os.system("diff '%s' '%s' >/dev/null 2>&1" % (filename, tempfilename))
  if rc == 0:
    WriteJson(filename, data)
  os.system("rm '%s'" % tempfilename)
  print filename


def GetDatFilelist(dir=None):
  """Finds all summary .dat files to clean up."""
  if not dir:
    raise Exception("No directory supplied.")
  if not os.path.exists(dir):
    raise Exception("Directory does not exist.")
  find_cmd = ['find', dir, '-name', '*-summary.dat']
  find_output = Backquote(find_cmd)
  return find_output.split("\n")


def Main(args):
  parser = optparse.OptionParser(usage=USAGE, version=__version__)
  options, args = parser.parse_args(args)

  # Get the given directories the user wants to work in.
  options.dir = []
  if len(args) > 1:
    options.dir.extend(args[1:len(args)])
  # If no directories are given, assume the current working directory.
  if len(options.dir) == 0:
    options.dir.append('.')

  for dir in options.dir:
    for filename in GetDatFilelist(dir=dir):
      ProcessJson(filename)
  return 0


if __name__ == '__main__':
  sys.exit(Main(sys.argv))
